library(tidyverse)     # for data cleaning and plotting
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)     # for date manipulation
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(openintro)     # for the abbr2state() function
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(maps)          # for map data
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(ggmap)         # for mapping points on maps
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(gplots)        # for col2hex() function
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(RColorBrewer)  # for color palettes
library(sf)            # for working with spatial data
## Linking to GEOS 3.9.1, GDAL 3.4.0, PROJ 8.1.1; sf_use_s2() is TRUE
library(leaflet)       # for highly customizable mapping
library(ggthemes)      
library(plotly)        # for the ggplotly() - basic interactivity
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
## 
##     wind
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(gganimate)     # for adding animation layers to ggplots
library(gifski)        # for creating the gif (don't need to load this library every time,but need it installed)
library(transformr)    # for "tweening" (gganimate)
## 
## Attaching package: 'transformr'
## The following object is masked from 'package:sf':
## 
##     st_normalize
library(shiny)         # for creating interactive apps
library(patchwork)     # for nicely combining ggplot2 graphs
library(gt)            # for creating nice tables
## 
## Attaching package: 'gt'
## The following object is masked from 'package:openintro':
## 
##     sp500
library(countrycode)
theme_set(theme_minimal())
library(geofacet)
library(tidytext)
olympics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-07-27/olympics.csv')
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, sex, team, noc, games, season, city, sport, event, medal
## dbl  (5): id, age, height, weight, year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
world <- map_data("world")
summer_gold <- olympics %>% 
  filter(season == "Summer") %>% 
  drop_na(medal) %>% 
  filter(medal == "Gold") %>% 
   mutate(countryName = countrycode(noc, "genc3c", "country.name")) %>% 
   mutate(countryName = ifelse(countryName == "USA", "United States", countryName)) %>% 
   mutate(countryName = ifelse(noc == "ALG", "Algeria", countryName)) %>% 
   mutate(countryName = ifelse(noc == "BAH", "Bahamas", countryName)) %>%
   mutate(countryName = ifelse(noc == "BUL", "Bulgaria", countryName)) %>%
   mutate(countryName = ifelse(noc == "CHI", "Chile", countryName)) %>%
   mutate(countryName = ifelse(noc == "CRC", "Costa Rica", countryName)) %>%
   mutate(countryName = ifelse(noc == "CRO", "Croatia", countryName)) %>%
   mutate(countryName = ifelse(noc == "DEN", "Denmark", countryName)) %>%
   mutate(countryName = ifelse(noc == "FIJ", "Fiji", countryName)) %>%
   mutate(countryName = ifelse(noc == "GER", "Germany", countryName)) %>%
   mutate(countryName = ifelse(noc == "GRE", "Greece", countryName)) %>%
   mutate(countryName = ifelse(noc == "GRN", "Grenada", countryName)) %>%
   mutate(countryName = ifelse(noc == "HAI", "Haiti", countryName)) %>%
   mutate(countryName = ifelse(noc == "INA", "Indonesia", countryName)) %>%
   mutate(countryName = ifelse(noc == "IRI", "Iran", countryName)) %>%
   mutate(countryName = ifelse(noc == "LAT", "Latvia", countryName)) %>%
   mutate(countryName = ifelse(noc == "MGL", "Mongolia", countryName)) %>%
   mutate(countryName = ifelse(noc == "NED", "Netherlands", countryName)) %>%
   mutate(countryName = ifelse(noc == "NGR", "Nigeria", countryName)) %>%
   mutate(countryName = ifelse(noc == "POR", "Portugal", countryName)) %>%
   mutate(countryName = ifelse(noc == "PUR", "Puerto Rico", countryName)) %>%
   mutate(countryName = ifelse(noc == "RSA", "South Africa", countryName)) %>%
   mutate(countryName = ifelse(noc == "SLO", "Slovenia", countryName)) %>%
   mutate(countryName = ifelse(noc == "SUI", "Switzerland", countryName)) %>%
   mutate(countryName = ifelse(noc == "UAE", "United Arab Emirates", countryName)) %>%
   mutate(countryName = ifelse(noc == "URU", "Uruguay", countryName)) %>%
   mutate(countryName = ifelse(noc == "VIE", "Vietnam", countryName)) %>%
   mutate(countryName = ifelse(noc == "ZIM", "Zimbabwe", countryName)) %>% 
drop_na(countryName)
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: ALG, ANZ, BAH, BUL, CHI, CRC, CRO, DEN, EUN, FIJ, FRG, GDR, GER, GRE, GRN, HAI, INA, IOA, IRI, KOS, LAT, MGL, NED, NGR, POR, PUR, RSA, SCG, SLO, SUI, TCH, TPE, UAE, URS, URU, VIE, YUG, ZIM
  #taking time to find the codes that weren't matched and double-checking to see if the countries exist --> add to data set

# Footnote or mention in part of the project the choice to leave out territories not fully recognized, such as Kosovo and Chinese Taipei, and countries that no longer exist, including the Soviet Union and Yugoslavia. 

summer_gold
## # A tibble: 9,806 × 16
##       id name    sex     age height weight team   noc   games  year season city 
##    <dbl> <chr>   <chr> <dbl>  <dbl>  <dbl> <chr>  <chr> <chr> <dbl> <chr>  <chr>
##  1     4 Edgar … M        34     NA     NA Denma… DEN   1900…  1900 Summer Paris
##  2    17 Paavo … M        28    175     64 Finla… FIN   1948…  1948 Summer Lond…
##  3    17 Paavo … M        28    175     64 Finla… FIN   1948…  1948 Summer Lond…
##  4    17 Paavo … M        28    175     64 Finla… FIN   1948…  1948 Summer Lond…
##  5    21 Ragnhi… F        27    163     NA Norway NOR   2008…  2008 Summer Beij…
##  6    42 Thomas… M        25     NA     NA Taifun NOR   1912…  1912 Summer Stoc…
##  7    56 Ren Ab… M        21     NA     NA France FRA   1956…  1956 Summer Melb…
##  8    72 Alekse… M        28    180     83 Belar… BLR   2008…  2008 Summer Beij…
##  9    73 Luc Ab… M        23    182     86 France FRA   2008…  2008 Summer Beij…
## 10    73 Luc Ab… M        27    182     86 France FRA   2012…  2012 Summer Lond…
## # … with 9,796 more rows, and 4 more variables: sport <chr>, event <chr>,
## #   medal <chr>, countryName <chr>
countriesone<-summer_gold %>%
  group_by(countryName) %>% 
  count(medal) %>% 
  arrange(desc(n)) #results in a medal count in descending order

Separate and compare each event and which countries earn the most gold - bar graph of all the countries that win gold - color by event

summer_gold %>% 
  group_by(countryName) %>%
  count() %>% 
  ggplot(aes(x=n, y=reorder(countryName,n)))+
  geom_col()+
  labs(title="Number of gold medals earned per country")

summer_gold%>% 
  group_by(countryName, sport) %>% 
  count() %>% 
  filter(n>20) %>% 
  ungroup %>% 
  mutate(sport2=reorder_within(sport,n,countryName)) %>% 
  ggplot(aes(x=n, y=sport2, fill=sport))+
  geom_col()+
  facet_wrap(vars(fct_reorder(countryName,n,sum,.desc = TRUE)),scales = "free_y")+
  scale_y_reordered()+
  labs(title="Countries that have won more than 20 gold medals")

Line graph

summer_all <- olympics %>% 
  filter(season == "Summer") %>% 
   mutate(countryName = countrycode(noc, "genc3c", "country.name")) %>% 
   mutate(countryName = ifelse(countryName == "United States", "USA", countryName)) %>% 
   mutate(countryName = ifelse(noc == "ALG", "Algeria", countryName)) %>% 
   mutate(countryName = ifelse(noc == "BAH", "Bahamas", countryName)) %>%
   mutate(countryName = ifelse(noc == "BUL", "Bulgaria", countryName)) %>%
   mutate(countryName = ifelse(noc == "CHI", "Chile", countryName)) %>%
   mutate(countryName = ifelse(noc == "CRC", "Costa Rica", countryName)) %>%
   mutate(countryName = ifelse(noc == "CRO", "Croatia", countryName)) %>%
   mutate(countryName = ifelse(noc == "DEN", "Denmark", countryName)) %>%
   mutate(countryName = ifelse(noc == "FIJ", "Fiji", countryName)) %>%
   mutate(countryName = ifelse(noc == "GER", "Germany", countryName)) %>%
   mutate(countryName = ifelse(noc == "GRE", "Greece", countryName)) %>%
   mutate(countryName = ifelse(noc == "GRN", "Grenada", countryName)) %>%
   mutate(countryName = ifelse(noc == "HAI", "Haiti", countryName)) %>%
   mutate(countryName = ifelse(noc == "INA", "Indonesia", countryName)) %>%
   mutate(countryName = ifelse(noc == "IRI", "Iran", countryName)) %>%
   mutate(countryName = ifelse(noc == "LAT", "Latvia", countryName)) %>%
   mutate(countryName = ifelse(noc == "MGL", "Mongolia", countryName)) %>%
   mutate(countryName = ifelse(noc == "NED", "Netherlands", countryName)) %>%
   mutate(countryName = ifelse(noc == "NGR", "Nigeria", countryName)) %>%
   mutate(countryName = ifelse(noc == "POR", "Portugal", countryName)) %>%
   mutate(countryName = ifelse(noc == "PUR", "Puerto Rico", countryName)) %>%
   mutate(countryName = ifelse(noc == "RSA", "South Africa", countryName)) %>%
   mutate(countryName = ifelse(noc == "SLO", "Slovenia", countryName)) %>%
   mutate(countryName = ifelse(noc == "SUI", "Switzerland", countryName)) %>%
   mutate(countryName = ifelse(noc == "UAE", "United Arab Emirates", countryName)) %>%
   mutate(countryName = ifelse(noc == "URU", "Uruguay", countryName)) %>%
   mutate(countryName = ifelse(noc == "VIE", "Vietnam", countryName)) %>%
   mutate(countryName = ifelse(noc == "ZIM", "Zimbabwe", countryName)) %>% 
drop_na(countryName)
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: AHO, ALG, ANG, ANT, ANZ, ARU, ASA, BAH, BAN, BAR, BER, BHU, BIZ, BOH, BOT, BRU, BUL, BUR, CAM, CAY, CGO, CHA, CHI, CRC, CRO, CRT, DEN, ESA, EUN, FIJ, FRG, GAM, GBS, GDR, GEQ, GER, GRE, GRN, GUA, GUI, HAI, HON, INA, IOA, IRI, ISV, IVB, KOS, KSA, KUW, LAT, LBA, LES, LIB, MAD, MAL, MAS, MAW, MGL, MON, MRI, MTN, MYA, NBO, NCA, NED, NEP, NFL, NGR, NIG, OMA, PAR, PHI, PLE, POR, PUR, RHO, ROT, RSA, SAA, SAM, SCG, SEY, SKN, SLO, SOL, SRI, SUD, SUI, TAN, TCH, TGA, TOG, TPE, UAE, UAR, UNK, URS, URU, VAN, VIE, VIN, WIF, YAR, YMD, YUG, ZAM, ZIM
prop_of_medals<-summer_all %>% 
  drop_na(medal) %>% 
  filter(countryName %in% c("USA", "United Kingdom", "Germany", "France", "Italy", "Australia", "Hungary", "Sweden", "Netherlands", "China")) %>% 
  count(countryName, medal) %>% 
  group_by(countryName) %>% 
  mutate(proportion=n/sum(n)) 

graph1<-prop_of_medals %>% 
  ggplot(aes(y=countryName,x=proportion,fill=fct_relevel(medal,c("Bronze","Silver"))))+
  scale_fill_manual(values=c("Bronze"="#a17419",Silver = "#b7b7b7", Gold = "#d5a500"))+
  geom_col()+
  labs(title="Distribution of medals won by the top 10 countries",y="",x="Proportion", fill=NULL)



graph2<-prop_of_medals %>% 
  ggplot(aes(y=fct_reorder(countryName,n,sum),x=n,fill=fct_relevel(medal,c("Bronze","Silver"))))+
  scale_fill_manual(values=c("Bronze"="#a17419",Silver = "#b7b7b7", Gold = "#d5a500"))+
  geom_col()+
  theme(legend.title = element_blank())+
  labs(title = "Number of medals won by the top 10 countries", y="", x="Count", fill=NULL)



ggplotly(graph1)
graph2